version: May 21, 2020


This is the analysis pipeline to analyze data generated from 3D models of fate tracked Montastraea cavernosa infected with stony coral tissue loss disease *** ### All analyses performed with R version 3.5.0

Basic setup of R environment


Loading required packages

For the following analyses we will require the use of a number of different R packages. Most of which can be sourced from CRAN, but some must be downloaded from GitHub. We can use the following code to ld in the packages and install any packages not previously installed in the R console.

#setwd("link to github")
if (!require("pacman")) install.packages("pacman")
pacman::p_load(ggplot2, stringr, gridExtra, ggpubr, Rmisc, FSA, rcompanion, RColorBrewer, dplyr, vegan, nparcomp, RVAideMemoire, MANOVA.RM, pairwiseAdonis, PMCMR, PMCMRplus, patchwork)
pacman::p_load_gh("pmartinezarbizu/pairwiseAdonis/pairwiseAdonis")



Loading and subsetting


Loading the dataset into R, and subsetting as needed, we will be making multiple subsets, subsetting by time point and by site to make things easier to workwith.

corr <- read.csv("../data/total.correlation.csv", head=T)
str(corr)
head(corr)

# Changing values to cm^2
corr$total.colony.size <- corr$total.colony.size*10000
corr$healthy.area <- corr$healthy.area*10000
corr$disease.area <- corr$disease.area*10000

# I am reordering the dates in my dataset to ensure that they display chronologically.
corr$site=factor(corr$site, levels=unique(corr$site)) 
levels(corr$date)
corr$date = factor(corr$date, levels(corr$date)[c(3, 4, 1, 2)])
levels(corr$date)

# Subsetting by timepoint, and then resetting the factor level for each timepoint
corr.t1 <- subset(corr, date == '8.24.18')
droplevels(corr.t1$date)

corr.t2 <- subset(corr, date == '9.11.18')
droplevels(corr.t2$date)

corr.t3 <- subset(corr, date == '11.8.18')
droplevels(corr.t3$date)

corr.t4 <- subset(corr, date == '12.17.18')
droplevels(corr.t4$date)

# This will be used when we analyze rates of loss and disease progression
corr.loss <- subset(corr, date != '8.24.18')
droplevels(corr.loss$date)



# Subsetting by Site (you can reset the factor for each site if needed from the code above)
corr.bc1 <- subset(corr, site == 'BC1')
corr.t328 <- subset(corr, site == 'T328')
corr.ftl4 <- subset(corr, site == 'FTL4')

# Average colony size from each site
mean(corr.bc1$total.colony.size)
mean(corr.t328$total.colony.size)
mean(corr.ftl4$total.colony.size)

# Subsetting the first time point from each location
corr.t1.bc1 <- subset(corr, site == "BC1")
corr.t1.t328 <- subset(corr, site == 'T328')
corr.t1.ftl4 <- subset(corr, site == "FTL4")

# Initial average colony size for each site
mean(corr.t1.bc1$total.colony.size)
mean(corr.t1.t328$total.colony.size)
mean(corr.t1.ftl4$total.colony.size)



Assessing normality


A Shapiro-Wilk Normality Test will be run on our 3 main variables to assess normality

shapiro.total <- shapiro.test(corr$total.colony.size)
shapiro.total
## 
##  Shapiro-Wilk normality test
## 
## data:  corr$total.colony.size
## W = 0.68843, p-value = 5.694e-13
shapiro.healthy <-  shapiro.test(corr$healthy.area)
shapiro.healthy
## 
##  Shapiro-Wilk normality test
## 
## data:  corr$healthy.area
## W = 0.6942, p-value = 7.679e-13
shapiro.disease <- shapiro.test(corr$disease.area)
shapiro.disease
## 
##  Shapiro-Wilk normality test
## 
## data:  corr$disease.area
## W = 0.6443, p-value = 6.517e-14



Friedman Rank Sum Test


Performs a Friedman rank sum test with unreplicated blocked data on the three major area measurments: total colony area, healthy tissue area, and lesion area.

fried.total <- friedmanTest(y=corr$total.colony.size, groups = corr$date, blocks = corr$colony.id)
fried.total 
## 
##  Friedman rank sum test
## 
## data:  corr$total.colony.size , corr$date and corr$colony.id
## Friedman chi-squared = 48.555, df = 3, p-value = 1.623e-10
fried.disease<- friedmanTest(y=corr$disease.area, groups = corr$date, blocks = corr$colony.id)
fried.disease
## 
##  Friedman rank sum test
## 
## data:  corr$disease.area , corr$date and corr$colony.id
## Friedman chi-squared = 14.017, df = 3, p-value = 0.002882
fried.healthy <- friedmanTest(y=corr$healthy.area, groups = corr$date, blocks = corr$colony.id)
fried.healthy
## 
##  Friedman rank sum test
## 
## data:  corr$healthy.area , corr$date and corr$colony.id
## Friedman chi-squared = 41.294, df = 3, p-value = 5.664e-09
fried.lesion <- friedmanTest(y = corr$lesion.count, groups = corr$date, blocks = corr$colony.id)
fried.lesion
## 
##  Friedman rank sum test
## 
## data:  corr$lesion.count , corr$date and corr$colony.id
## Friedman chi-squared = 7.4536, df = 3, p-value = 0.05876



Pairwise Comparisons using the Nemenyi test


Pairwise comparisons of significant Friedman’s tests and manually adjusting the p values using a Bonferroni correction

# Total colony size
post.total <- posthoc.friedman.nemenyi.test(total.colony.size ~ date | colony.id, data = corr)
summary(post.total)
## 
##  Pairwise comparisons using Nemenyi multiple comparison test 
##              with q approximation for unreplicated blocked data
## data: total.colony.size and date and colony.id
## P value adjustment method: none
## H0
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
#taking pvalues from Nemenyi test and manually correcting them
total.val <- c(0.0135,4.8e-07, 1.1e-09, 0.0875, 0.0044, 0.7458 )

# Using a bonferroni correction
adjust.total.val <- p.adjust(total.val, method = 'bonferroni')


# Pairwise comparisons using Nemenyi multiple comparison test   
# with q approximation for unreplicated blocked data 
# 
# data:  total.colony.size and date and colony.id 
# 
# 
# P value adjustment method: none 
#          H0              statistic p.value   adjusted p
# 1  8.24.18  =  9.11.18  4.269075  0.0135      0.018
# 2  8.24.18  =  11.8.18  7.589466 4.8e-07      2.88e-06
# 3 8.24.18  =  12.17.18  9.012491 1.1e-09      6.60e-09
# 4  9.11.18  =  11.8.18  3.320392  0.0875      5.25e-01
# 5 9.11.18  =  12.17.18  4.743416  0.0044      2.64e-02
# 6 11.8.18  =  12.17.18  1.423025  0.7458      1.00e+00


############################################
# Healthy tissue area
post.healthy <- posthoc.friedman.nemenyi.test(healthy.area ~ date | colony.id, data = corr)
summary(post.healthy)
## 
##  Pairwise comparisons using Nemenyi multiple comparison test 
##              with q approximation for unreplicated blocked data
## data: healthy.area and date and colony.id
## P value adjustment method: none
## H0
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
#taking pvalues from Nemenyi test and manually correcting them

healthy.val <- c(0.0044, 2.9e-06,  1.9e-08, 0.3358,0.0497, 0.8078 )

# Using a bonferroni correction
adjust.healthy.val <- p.adjust(healthy.val, method = 'bonferroni')

# Pairwise comparisons using Nemenyi multiple comparison test   
# with q approximation for unreplicated blocked data 
# 
# data:  healthy.area and date and colony.id 
# 
# 
# P value adjustment method: none 
#     H0                  statistic p.value     padjust
# 1  8.24.18  =  9.11.18  4.743416  0.0044      2.640e-02
# 2  8.24.18  =  11.8.18  7.115125 2.9e-06      1.740e-05
# 3 8.24.18  =  12.17.18  8.380036 1.9e-08      1.140e-07
# 4  9.11.18  =  11.8.18  2.371708  0.3358      1.000e+00
# 5 9.11.18  =  12.17.18  3.636619  0.0497      2.982e-01
# 6 11.8.18  =  12.17.18  1.264911  0.8078      2.982e-01


############################################
#Lesion area
post.disease <- posthoc.friedman.nemenyi.test(disease.area ~ date | colony.id, data = corr)
summary(post.disease)
## 
##  Pairwise comparisons using Nemenyi multiple comparison test 
##              with q approximation for unreplicated blocked data
## data: disease.area and date and colony.id
## P value adjustment method: none
## H0
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
#taking pvalues from Nemenyi test and manually correcting them
disease.val <- c(0.536, 0.970,  0.114, 0.808,0.002,0.037 )



# Using a bonferroni correction
adjust.disease.val <- p.adjust(disease.val, method = 'bonferroni')





# Pairwise comparisons using Nemenyi multiple comparison test   
# with q approximation for unreplicated blocked data 
# 
# data:  disease.area and date and colony.id 
# 
# 
# P value adjustment method: none 
#         H0              statistic p.value   padjust
# 1  8.24.18  =  9.11.18 1.8973666   0.536    1.000
# 2  8.24.18  =  11.8.18 0.6324555   0.970    1.000
# 3 8.24.18  =  12.17.18 3.1622777   0.114    0.684
# 4  9.11.18  =  11.8.18 1.2649111   0.808    1.000
# 5 9.11.18  =  12.17.18 5.0596443   0.002    0.012
# 6 11.8.18  =  12.17.18 3.7947332   0.037    0.222



Running PERMANOVA in R


We will be using the adonis() function in vegan. We will run a total of 999 permutations and test the effects of site, time, and the interaction between the two factors.

set.seed(999)
perm <- adonis(formula = corr[c(5:7)] ~ site * date, data = corr, method = "euclidian", perm = 999)
perm 
## 
## Call:
## adonis(formula = corr[c(5:7)] ~ site * date, data = corr, permutations = 999,      method = "euclidian") 
## 
## Permutation: free
## Number of permutations: 999
## 
## Terms added sequentially (first to last)
## 
##           Df  SumsOfSqs   MeanSqs F.Model      R2 Pr(>F)    
## site       2  318526671 159263335  7.9460 0.15790  0.001 ***
## date       3   13288496   4429499  0.2210 0.00659  0.893    
## site:date  6    1839834    306639  0.0153 0.00091  1.000    
## Residuals 84 1683620228  20043098         0.83460           
## Total     95 2017275229                   1.00000           
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

PERMANOVA reveals that Site has a significant effect on tissue areas.



Pairwise PERMANOVA for multiple comparisons


We found that Site was a significant factor in our PERMANOVA, so we are using a pairwise PERMANOVA using the function pairwise.adonis() from the package pairwiseAdonis to see where differences are between our sites.

set.seed(999)
perm.pair <- pairwise.adonis(corr[5:7],factors=corr$site,
                             sim.method='euclidian',p.adjust.m='bonferroni', perm = 999)
perm.pair
##          pairs Df SumsOfSqs   F.Model         R2 p.value p.adjusted sig
## 1  BC1 vs T328  1 313927982 13.154125 0.18486806   0.001      0.003   *
## 2  BC1 vs FTL4  1  48874191  9.027413 0.12032153   0.008      0.024   .
## 3 T328 vs FTL4  1 131553695  4.925368 0.07359494   0.028      0.084

BC1 is significantly different from both T328 and FTL4.



Correlation Analyses


Multiple correlation analyses were carried out to see how different colony metrics affected disease manifestation and progress.

# Looking at correlation between total colony size and disease lesion area

colonysize.vs.lesionarea <- cor.test(corr$total.colony.size,corr$disease.area, method="spearman", use="complete.obs")
colonysize.vs.lesionarea
## 
##  Spearman's rank correlation rho
## 
## data:  corr$total.colony.size and corr$disease.area
## S = 125790, p-value = 0.1535
## alternative hypothesis: true rho is not equal to 0
## sample estimates:
##       rho 
## 0.1468166
# Looking at correlation between total colony size and the number of disease lesions on a colony

colonysize.vs.lesioncount<- cor.test(corr$total.colony.size,corr$lesion.count, method="spearman", use="complete.obs")
colonysize.vs.lesioncount
## 
##  Spearman's rank correlation rho
## 
## data:  corr$total.colony.size and corr$lesion.count
## S = 120610, p-value = 0.07599
## alternative hypothesis: true rho is not equal to 0
## sample estimates:
##       rho 
## 0.1819743
# Looking at correlation between the amount of lesion tissue area and number of disease lesions

lesionarea.vs.lesioncount <- cor.test(corr$disease.area,corr$lesion.count, method="spearman", use="complete.obs")
lesionarea.vs.lesioncount
## 
##  Spearman's rank correlation rho
## 
## data:  corr$disease.area and corr$lesion.count
## S = 48640, p-value = 8.23e-14
## alternative hypothesis: true rho is not equal to 0
## sample estimates:
##       rho 
## 0.6701035

The number of disease lesions were positively correlated with the amount of disease lesion tissue area. However, colony size was not correlated with disease lesion area or the number of disease lesions. So, bigger colonies have no more (or less) disease than smaller colonies.

Analysis of Lesion Count using Kruskal-Wallis and Dunn’s Test


Looking at differences of lesion count among site.

kruskal.test(lesion.count ~ site, data = corr)
## 
##  Kruskal-Wallis rank sum test
## 
## data:  lesion.count by site
## Kruskal-Wallis chi-squared = 17.076, df = 2, p-value = 0.0001958
dunnTest(lesion.count ~ site, data = corr, method = 'bh')
## Dunn (1964) Kruskal-Wallis multiple comparison
##   p-values adjusted with the Benjamini-Hochberg method.
##    Comparison          Z      P.unadj        P.adj
## 1  BC1 - FTL4  0.7428398 4.575787e-01 0.4575786786
## 2  BC1 - T328 -3.1580994 1.588014e-03 0.0023820209
## 3 FTL4 - T328 -3.9595527 7.509028e-05 0.0002252708

Plotting


We are now going to make a number of box plots and correlation plots to visualize the data we just analyzed. I will display the code for one of each (as the code is the same just replacing variable names) and then I will compile the plots using the R package patchwork.

Box Plots

# This forces y axis labels to have 1 decimal place.
scale <- function(x) sprintf("%.0f", x)

# Plotting a boxplot of mean total colony size for each site at each timepoint. 
total.colony.box.1 <-
  ggplot(data = corr, aes(x = date, y = total.colony.size))+
  geom_boxplot(aes(fill = site), alpha = 1, outlier.shape = NA, color = "black") +
  geom_point(aes(fill = site), color = "black", size = 3, position = position_jitterdodge()) +
  scale_fill_manual(values = c("#7BA46C", "#EACF9E", "#008D91")) +
  scale_x_discrete(labels = c(expression('T'[1]), expression('T'[2]), expression('T'[3]), expression('T'[4])))+
  labs(x = "Time", y = bquote("Total Colony Area" ~ (cm^2)),fill = 'Site') +
  scale_y_continuous(labels = scale)+
  theme(legend.title = element_blank(),
        legend.text = element_text(color = "black", size = 25),
        legend.background = element_blank())+
  facet_grid(.~site,scales="free")
total.colony.box.1

total.colony.box <- total.colony.box.1 + theme(
  panel.grid.major = element_line(size = 0.5, linetype = 'solid', colour = "white"),
  panel.background = element_rect(fill = '#F5F5F5'),
  plot.title = element_text(hjust = 0.5), 
  axis.line = element_line(colour = "black"), 
  axis.ticks = element_line(color="black"), 
  text = element_text(size=15, color="black"), 
  axis.text.x=element_text(size=12, color="black"), 
  axis.text.y=element_text(size=12, color="black"),
  axis.title.y = element_text(size = 18, color = 'black'),
  plot.margin = unit(c(0.5, 1, 0.5, 1), "cm"))+ 
  rremove("legend")

total.colony.box

We repeat this using healthy tissue area, disease lesion area and lesion count.

Correlation Plots

Here I will be showing the code for a significant and a non-significant correlation plot as there will be slight difference in the code showing the correlation trend.

# No correlation
lesion.count.colony.size.1 <-ggplot(corr, aes(x = total.colony.size, y = lesion.count, color = site))+
  geom_point(size = 5)+
  scale_color_manual(values = c("#7BA46C", "#EACF9E", "#008D91"))+
  labs(x = bquote('Total Colony Size'~(cm^2)), y = bquote('Cattle Tag Error'~(cm^2)))+
  scale_y_continuous(labels = scale)

lesion.count.colony.size <- lesion.count.colony.size.1 + 
  theme(panel.grid.major = element_line(size = 0.5, linetype = 'solid', colour = "white"),                 panel.background = element_rect(fill = '#F5F5F5'),
        plot.title = element_text(hjust = 0.5), 
        axis.line = element_line(colour = "black"), 
        axis.ticks = element_line(color="black"), 
        text = element_text(size=15, color="black"), 
        axis.text.x=element_text(size=12, color="black"), 
        axis.text.y=element_text(size=12, color="black"),
        axis.title.y = element_text(size = 18, color = 'black'),
        plot.margin = unit(c(0.5, 1, 0.5, 1.75), "cm"))+
  rremove("legend")
lesion.count.colony.size 

# Significant Correlation

lesion.count.disease.area.1 <-ggplot(corr, aes(x = disease.area, y = lesion.count, color = site))+
  geom_point(size = 5)+
  geom_smooth(method = "lm", fill = 'grey30', color = 'grey30')+
  scale_color_manual(values = c("#7BA46C", "#EACF9E", "#008D91"))+
  labs(x = bquote('Disease Lesion Area'~(cm^2)), y = "Lesion Count") +
  scale_y_continuous(labels = scale)
lesion.count.disease.area.1

lesion.count.disease.area <- lesion.count.disease.area.1 + 
  theme(panel.grid.major = element_line(size = 0.5, linetype = 'solid', colour = "white"),                 panel.background = element_rect(fill = '#F5F5F5'),
        plot.title = element_text(hjust = 0.5), 
        axis.line = element_line(colour = "black"), 
        axis.ticks = element_line(color="black"), 
        text = element_text(size=15, color="black"), 
        axis.text.x=element_text(size=12, color="black"), 
        axis.text.y=element_text(size=12, color="black"),
        axis.title.y = element_text(size = 18, color = 'black'),
        plot.margin = unit(c(0.5, 1, 0.5, 1.57), "cm"))+
  rremove("legend")
lesion.count.disease.area 



Putting plots together using patchwork.


patchwork adds ggplot2 plots together to compose multiplot layouts using simple operators.

lesion.plots.patch <- total.colony.box + healthy.colony.box + disease.area.box + lesion.count.box + colony.size.disease.area + lesion.count.disease.area + lesion.count.colony.size + guide_area() + plot_layout(nrow = 4, guides = 'collect')+plot_annotation(tag_levels = 'a')

## Save
ggsave("../figures/lesion.plots.tiff",  plot= lesion.plots.patch, width=20, height=20, units="in", dpi=600)



***

Rates of Loss


Correlation analyses


Correlation analyses were run on rates of loss and various colony metrics.

## Rate of loss 1 and total colony size at T2
cor.test(corr.t2$loss,corr.t2$total.colony.size, method="spearman", use="complete.obs")
## 
##  Spearman's rank correlation rho
## 
## data:  corr.t2$loss and corr.t2$total.colony.size
## S = 2862, p-value = 0.2487
## alternative hypothesis: true rho is not equal to 0
## sample estimates:
##        rho 
## -0.2443478
# Rate of loss 2 and total colony size at T3
cor.test(corr.t3$loss,corr.t3$total.colony.size, method="spearman", use="complete.obs")
## 
##  Spearman's rank correlation rho
## 
## data:  corr.t3$loss and corr.t3$total.colony.size
## S = 2296, p-value = 0.9936
## alternative hypothesis: true rho is not equal to 0
## sample estimates:
##         rho 
## 0.001739509
# Rate of loss 3 and total colony size at T4
cor.test(corr.t4$loss,corr.t4$total.colony.size, method="spearman", use="complete.obs")
## 
##  Spearman's rank correlation rho
## 
## data:  corr.t4$loss and corr.t4$total.colony.size
## S = 2161.8, p-value = 0.7804
## alternative hypothesis: true rho is not equal to 0
## sample estimates:
##       rho 
## 0.0600653

Visualizing rates of loss


Now using same types plots as before, but for loss data. I am excluiding the first time point, and will subset accordingly. And use the same code as above.

#subset
corr.loss <- subset(corr, date != "8.24.18")